LSTM & GRU¶
In [1]:
Copied!
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split
from IPython.display import Image
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split
from IPython.display import Image
定义输入数据¶
假设输入样本数量为1000,每个样本的特征数量为15,每个小批量中时间步数为10,批量大小为64,同时,每个样本都对应一个标量标签。
输入的形状:如果batch_first为False,那么输入形状为(seq_len, batch_size, input_size),True则为(batch_size, seq_len, input_size)
In [2]:
Copied!
# seq_len表示时间步数,batch_size表示批量的大小,input_size表示每个时间步的输入特征的维度。
seq_len = 10
sample_size = 1000
features = 15
BATCH_SIZE = 64
X = torch.randn(seq_len, sample_size, features) # (seq_len, batch_size, input_size)
y = torch.randn(seq_len, sample_size, 1)
X = X.permute(1, 0, 2)
y = y.permute(1, 0, 2)
torch_dataset = TensorDataset(X, y)
train_size = int(sample_size * 0.8)
test_size = sample_size - train_size
trainset, testset = random_split(torch_dataset, [train_size, test_size])
trainloader = DataLoader(dataset=trainset,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=2)
# seq_len表示时间步数,batch_size表示批量的大小,input_size表示每个时间步的输入特征的维度。
seq_len = 10
sample_size = 1000
features = 15
BATCH_SIZE = 64
X = torch.randn(seq_len, sample_size, features) # (seq_len, batch_size, input_size)
y = torch.randn(seq_len, sample_size, 1)
X = X.permute(1, 0, 2)
y = y.permute(1, 0, 2)
torch_dataset = TensorDataset(X, y)
train_size = int(sample_size * 0.8)
test_size = sample_size - train_size
trainset, testset = random_split(torch_dataset, [train_size, test_size])
trainloader = DataLoader(dataset=trainset,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=2)
In [3]:
Copied!
for x in trainloader:
print(x)
break
for x in trainloader:
print(x)
break
[tensor([[[ 0.4540, 2.0505, 1.8005, ..., 0.4042, -1.7250, 1.0350],
[-0.8664, -1.3743, -0.0184, ..., -1.0852, 0.1087, 0.7777],
[-0.0795, 0.2301, -2.6513, ..., 0.4672, 1.5841, 1.0589],
...,
[ 0.0163, 0.2077, -0.9056, ..., -0.8597, -1.1141, 0.1540],
[ 0.4538, 1.9588, 1.0234, ..., -0.3064, -0.1393, 2.9072],
[-0.0232, 2.0684, -0.3065, ..., 0.2909, 1.0892, 1.1136]],
[[ 0.1901, 0.3496, 0.1059, ..., 2.3296, 1.1058, -1.9532],
[-0.6137, -0.1424, -0.8775, ..., 0.2460, 0.9742, -0.5776],
[ 2.4492, 1.1111, -0.1209, ..., 1.4041, 0.7044, 0.9197],
...,
[ 1.1801, 1.4396, 0.1354, ..., 1.1290, 0.3494, 0.7535],
[-0.2014, -0.5187, 0.9556, ..., 0.2538, 0.5075, 0.3611],
[ 1.1014, 0.0194, -1.2717, ..., 0.9697, -0.0345, 0.2379]],
[[-0.6724, 1.5309, -0.9318, ..., 0.8211, -0.0582, -0.6883],
[-0.0387, 0.2154, -0.2574, ..., -0.2483, -0.7780, 0.1357],
[ 0.2196, -1.8416, 0.1226, ..., -2.4800, 1.0984, -0.4491],
...,
[-0.7954, -0.3653, -1.4614, ..., 1.1284, -0.2406, -0.4149],
[-0.5738, 0.4865, 0.0638, ..., 1.8691, -0.8992, -0.4465],
[ 0.2563, -0.2688, 0.5947, ..., -0.9301, 0.2877, 0.0351]],
...,
[[-0.4572, 1.4819, 0.5794, ..., -1.3377, 0.0698, -0.8983],
[-0.5031, -0.8456, 0.2395, ..., -1.0274, 0.5153, -0.0449],
[ 0.7017, 0.4075, 1.2060, ..., -1.6530, -0.2479, -0.9618],
...,
[-0.6460, 0.3897, -1.4528, ..., 0.9944, 1.0757, 2.3098],
[-1.1047, -1.2345, 0.4144, ..., -1.4963, -1.9488, 0.4453],
[-1.0459, -0.0657, -1.2011, ..., -0.7865, -0.3292, -0.8563]],
[[ 0.2871, -0.3138, 0.7231, ..., -0.2983, 0.7018, -0.0636],
[ 0.4957, 0.3103, -1.6254, ..., -1.5479, 0.0831, -1.0979],
[-0.1899, 0.5540, -0.1424, ..., 0.9903, -0.1866, 1.5301],
...,
[-0.5276, 0.7333, -0.4807, ..., 0.3177, -0.8717, 1.5046],
[-0.1085, -0.5952, 1.2560, ..., -0.0697, -0.0551, -0.9879],
[-0.5258, 0.6290, 0.2147, ..., 2.4136, -0.1919, -1.4033]],
[[ 0.6398, -0.1231, 0.9040, ..., 0.6298, -0.3919, -0.3392],
[ 0.4048, 1.5550, -0.2902, ..., -0.0494, -0.5509, -0.7130],
[ 1.5928, 1.2198, -1.3477, ..., -0.1728, 0.3132, 1.0235],
...,
[-0.4726, -0.3622, -0.3287, ..., -0.1006, 1.0964, 0.4600],
[ 1.5430, 1.6871, -0.8182, ..., 0.9847, -1.6258, -0.2472],
[-1.1018, -0.9390, 1.2753, ..., -0.2903, -2.0172, 0.1515]]]), tensor([[[-1.5709],
[ 0.6907],
[-1.6860],
[-1.3964],
[-0.3577],
[ 2.2987],
[-1.8072],
[ 0.7713],
[ 0.4866],
[ 1.7622]],
[[ 0.5684],
[-0.1827],
[-0.0798],
[ 1.5299],
[ 0.6218],
[-0.0976],
[-0.1144],
[-0.1398],
[-1.1544],
[-0.4830]],
[[ 1.1454],
[-0.0628],
[-0.7226],
[-0.4345],
[ 0.0771],
[ 0.1056],
[ 0.8524],
[-0.4551],
[ 0.4099],
[-0.0734]],
[[-0.0347],
[-1.4483],
[-0.5597],
[ 0.4482],
[ 0.8384],
[-0.3172],
[ 0.1523],
[ 1.9179],
[-0.9762],
[-0.2537]],
[[ 0.0734],
[-0.1998],
[ 1.2260],
[ 0.0141],
[-0.5588],
[ 0.7336],
[-0.4237],
[-1.9729],
[-0.7819],
[ 0.8539]],
[[ 0.3507],
[ 2.4681],
[-0.6129],
[ 0.2836],
[-0.3749],
[ 0.4448],
[ 1.5269],
[-1.6124],
[-0.6096],
[-0.6592]],
[[-1.0949],
[ 0.9060],
[ 0.4175],
[ 1.0266],
[ 2.0856],
[-0.9929],
[-1.6886],
[-1.2776],
[-0.5418],
[ 0.8046]],
[[-0.1131],
[ 0.8197],
[-0.3971],
[ 0.0247],
[-0.6565],
[-0.0853],
[ 1.2847],
[-1.0787],
[-0.7871],
[ 0.1985]],
[[ 0.2310],
[-0.6677],
[-0.4424],
[-0.1500],
[-1.7082],
[-0.9802],
[ 0.2158],
[-1.2602],
[ 0.1445],
[ 0.5894]],
[[-1.4972],
[-0.6308],
[ 1.0187],
[-0.9368],
[ 0.1506],
[-0.9020],
[ 0.4657],
[ 0.4230],
[ 0.2151],
[-1.7368]],
[[-0.1018],
[-0.9252],
[ 0.1418],
[-2.0305],
[ 1.1622],
[ 0.4028],
[-1.6947],
[ 0.1442],
[ 0.3116],
[-0.0413]],
[[-1.8171],
[-0.9318],
[-0.9737],
[-1.0428],
[-0.0321],
[-2.0666],
[-1.9157],
[-0.4882],
[ 1.8116],
[ 0.5419]],
[[-0.6213],
[ 0.3593],
[-1.6192],
[-1.1549],
[-0.7091],
[-0.1382],
[ 0.7691],
[-0.5096],
[-1.1488],
[ 0.4640]],
[[ 0.0934],
[ 0.7345],
[-1.9699],
[-0.1501],
[-1.4518],
[ 0.8438],
[-0.9983],
[-1.0069],
[-0.4270],
[-0.3539]],
[[-0.7299],
[-0.8425],
[-0.0080],
[-0.3593],
[ 1.3911],
[-2.4460],
[-1.1120],
[ 0.9920],
[-0.4968],
[-1.4364]],
[[-0.1532],
[-0.8001],
[-0.9527],
[ 2.2140],
[ 0.8705],
[-0.7195],
[-0.5074],
[ 0.0318],
[-1.4823],
[ 2.4329]],
[[ 2.1200],
[-0.7386],
[-1.1488],
[-2.0068],
[-0.3175],
[-0.6614],
[ 1.0267],
[ 0.1238],
[-1.6986],
[-0.2849]],
[[-0.9482],
[ 1.2260],
[-1.3401],
[-0.5489],
[ 1.4622],
[ 0.6643],
[-0.5707],
[ 0.8380],
[ 0.3058],
[ 0.2090]],
[[ 1.7450],
[-0.2175],
[-2.1850],
[ 0.2406],
[-0.2823],
[-0.1151],
[ 0.4908],
[ 0.6332],
[-0.1500],
[-1.8751]],
[[ 0.7139],
[ 0.4172],
[-2.0881],
[ 0.1118],
[ 1.3186],
[-1.0013],
[ 0.1190],
[-0.3780],
[-0.5342],
[ 1.3879]],
[[-0.8204],
[-1.4511],
[-0.8640],
[-1.5900],
[ 1.1896],
[ 0.2786],
[ 1.8005],
[ 0.5636],
[ 1.6180],
[ 0.6934]],
[[-0.2423],
[-0.4656],
[ 0.0649],
[ 0.4830],
[ 1.0295],
[ 0.1270],
[ 0.3782],
[-2.8454],
[-0.2120],
[-0.0736]],
[[ 0.3179],
[ 0.7503],
[-1.0374],
[ 0.9465],
[ 0.5352],
[ 0.4448],
[ 0.4208],
[ 0.0478],
[-0.7716],
[-0.1559]],
[[-0.0952],
[-2.0084],
[-0.8770],
[-1.4925],
[ 1.5185],
[ 0.3389],
[ 1.9479],
[-1.5051],
[-0.0909],
[ 0.3004]],
[[ 0.2005],
[ 1.5029],
[-1.1880],
[-0.8149],
[-0.4484],
[-0.5238],
[ 0.8639],
[-0.1100],
[-1.2331],
[ 1.6497]],
[[-0.6505],
[ 0.3652],
[ 1.1299],
[ 0.5280],
[ 1.5002],
[-0.3228],
[-0.9457],
[ 0.6920],
[-0.4781],
[-0.0550]],
[[-0.4542],
[-1.5607],
[ 0.5638],
[ 0.9442],
[ 1.8492],
[ 1.2200],
[-2.3016],
[ 0.4301],
[-0.7828],
[-0.2936]],
[[ 0.1229],
[ 0.2418],
[-1.1512],
[ 0.4611],
[-0.8161],
[ 0.9149],
[ 0.0089],
[-0.6489],
[ 0.8479],
[-0.4264]],
[[-0.8771],
[-1.6645],
[-0.5761],
[-0.7062],
[-0.3473],
[ 0.7659],
[-1.0553],
[-1.0488],
[-0.8943],
[-0.6548]],
[[ 1.2723],
[-1.6011],
[ 0.2794],
[ 0.0332],
[ 0.9927],
[-0.4701],
[ 0.8528],
[ 1.1369],
[-1.1027],
[ 0.1231]],
[[ 0.3783],
[-0.2039],
[ 0.1211],
[-0.1222],
[ 0.0259],
[ 0.6850],
[ 0.2033],
[-0.8521],
[-1.3138],
[-0.5488]],
[[ 2.6347],
[-0.9334],
[ 0.0489],
[-0.7768],
[ 2.5205],
[ 1.1248],
[ 1.2863],
[-1.0040],
[-1.0829],
[ 0.9272]],
[[-1.2278],
[ 0.8895],
[-0.1237],
[-1.4893],
[ 2.2448],
[ 1.5004],
[ 0.9575],
[ 0.1816],
[ 0.3577],
[-0.1371]],
[[-0.9853],
[ 1.2098],
[-1.4069],
[-0.9601],
[ 0.5244],
[ 1.0759],
[ 0.5365],
[ 1.6918],
[-0.0706],
[-0.9400]],
[[ 0.4468],
[-0.0887],
[ 0.5069],
[-1.2028],
[ 0.2851],
[-1.9049],
[-0.4139],
[ 1.3335],
[-0.5851],
[ 1.3641]],
[[-0.1526],
[ 1.3827],
[ 2.5000],
[-0.5652],
[ 0.4264],
[-0.0350],
[-0.9127],
[-0.6392],
[-0.3261],
[ 0.1332]],
[[ 0.6819],
[-1.0206],
[-0.1230],
[-1.6336],
[ 1.0668],
[-0.9677],
[ 1.1188],
[-0.7318],
[-1.1702],
[ 1.0645]],
[[ 0.2888],
[ 1.0870],
[-1.0102],
[-0.1114],
[ 0.4641],
[ 0.1511],
[-1.5289],
[-0.4768],
[ 0.1281],
[ 0.9391]],
[[ 0.8516],
[-1.1553],
[ 0.2368],
[ 0.4949],
[ 0.3233],
[-0.1362],
[-0.0573],
[-1.1569],
[-0.6109],
[ 1.3095]],
[[-0.3924],
[-1.1999],
[-0.7581],
[ 0.1389],
[ 0.1853],
[-0.1724],
[-0.0943],
[ 1.5426],
[ 0.2150],
[-0.8276]],
[[-1.8525],
[-0.3655],
[ 0.1520],
[ 0.9895],
[ 0.2511],
[ 1.1828],
[ 0.3470],
[ 0.7860],
[ 0.5649],
[-0.6724]],
[[-1.2869],
[ 0.6137],
[ 1.1196],
[ 1.5099],
[ 1.1411],
[ 0.6749],
[-1.0315],
[-1.5448],
[ 1.0288],
[ 1.7036]],
[[ 0.4027],
[-0.7217],
[ 1.9991],
[ 1.1133],
[-0.6257],
[-1.1919],
[-0.9954],
[-0.2548],
[ 0.9436],
[-0.6414]],
[[ 0.5367],
[ 0.3248],
[ 0.5709],
[ 2.6988],
[ 0.6149],
[-0.0748],
[-1.4515],
[ 0.0661],
[ 0.3674],
[ 0.5656]],
[[ 0.1820],
[ 0.9259],
[ 0.5967],
[ 0.6928],
[ 0.3814],
[-0.3063],
[-0.2378],
[ 0.7602],
[ 1.2203],
[-0.3508]],
[[ 0.5464],
[-1.1729],
[ 0.8288],
[-1.3005],
[-0.8189],
[ 0.7396],
[ 0.1646],
[-2.1795],
[ 2.1045],
[-0.1535]],
[[ 1.7980],
[ 0.4960],
[ 1.1532],
[ 2.5535],
[-0.6819],
[-0.4232],
[ 0.9125],
[-0.9277],
[-0.7095],
[ 0.1194]],
[[ 0.4600],
[ 0.7014],
[-0.2445],
[ 1.0901],
[ 0.3241],
[-0.5549],
[-0.6748],
[ 0.8914],
[-0.0451],
[-0.3045]],
[[ 1.5078],
[-0.4262],
[-0.2154],
[-2.2366],
[-0.3359],
[-0.6068],
[-0.8079],
[ 0.5747],
[-1.1080],
[ 2.0380]],
[[-0.4562],
[-0.9502],
[ 0.2414],
[-0.1149],
[ 0.3826],
[-1.6368],
[ 0.1952],
[-0.6525],
[ 1.0410],
[ 0.4080]],
[[ 0.0213],
[ 1.6952],
[ 0.6122],
[-0.2915],
[ 0.8458],
[ 0.2689],
[ 0.7552],
[ 0.2123],
[ 0.6404],
[-0.7454]],
[[-0.2145],
[-1.3778],
[-0.4716],
[ 1.3725],
[-0.6932],
[-0.3569],
[ 0.3245],
[-1.0493],
[ 0.7275],
[-1.3761]],
[[-0.1883],
[ 0.6070],
[-0.2640],
[-0.0570],
[ 0.8877],
[ 0.7956],
[ 0.2761],
[ 0.7250],
[-0.7508],
[ 0.0156]],
[[-0.8689],
[-0.3800],
[ 0.3074],
[ 0.7342],
[ 0.9131],
[ 0.2531],
[-0.4266],
[-2.2457],
[ 0.9877],
[ 1.4258]],
[[-0.2649],
[-1.3330],
[ 2.0015],
[-0.3788],
[-1.0747],
[ 0.1831],
[-0.3347],
[-1.7849],
[-0.1281],
[ 0.9245]],
[[-0.4439],
[ 0.2421],
[ 0.2592],
[-1.1084],
[ 0.1238],
[ 0.6808],
[ 0.4579],
[ 0.5432],
[-0.4756],
[-1.2161]],
[[ 1.9364],
[ 0.5351],
[ 0.1978],
[ 0.8171],
[-1.2738],
[-1.9240],
[-0.3814],
[-0.7230],
[-0.8296],
[-0.3990]],
[[ 0.0366],
[ 0.5989],
[ 0.6553],
[ 1.3627],
[ 2.7729],
[-1.1558],
[ 1.2091],
[ 0.3231],
[ 0.1199],
[-0.6324]],
[[-0.8138],
[-1.6674],
[ 0.0269],
[-0.2478],
[ 0.0819],
[-1.7926],
[-0.1844],
[ 0.6594],
[-0.6274],
[-0.3338]],
[[-0.2131],
[-0.6385],
[ 0.8359],
[ 1.0082],
[ 1.4671],
[ 0.1550],
[-1.6600],
[-0.1142],
[ 0.4333],
[ 1.6088]],
[[-1.0346],
[-0.4436],
[-0.1512],
[-1.2718],
[-0.0598],
[-1.8043],
[-0.6428],
[-0.5077],
[-0.1200],
[-0.8708]],
[[-0.1190],
[-0.6906],
[-1.4925],
[-0.5035],
[-0.4851],
[-0.5124],
[ 1.1289],
[-0.8942],
[-0.4142],
[ 1.0863]],
[[-0.3967],
[ 1.1904],
[-1.5017],
[ 0.8212],
[-1.0112],
[ 0.5949],
[-2.4454],
[ 1.2159],
[-1.2109],
[-0.1800]],
[[-1.1689],
[-0.3799],
[ 1.4444],
[-0.9269],
[-0.2881],
[ 0.2412],
[ 1.7357],
[-1.4642],
[-1.9955],
[-0.4879]]])]
In [4]:
Copied!
x[0].shape, x[1].shape
x[0].shape, x[1].shape
Out[4]:
(torch.Size([64, 10, 15]), torch.Size([64, 10, 1]))
tensor.detach()方法用于返回一个新的Tensor,这个Tensor和原来的Tensor共享相同的内存空间,但是不会被计算图所追踪,也就是说它不会参与反向传播,不会影响到原有的计算图
In [2]:
Copied!
Image(filename='./data/LSTM_1.png')
Image(filename='./data/LSTM_1.png')
Out[2]:
In [3]:
Copied!
Image(filename='./data/LSTM_2.png')
Image(filename='./data/LSTM_2.png')
Out[3]:
In [6]:
Copied!
Image(filename='./data/LSTM_3.png')
Image(filename='./data/LSTM_3.png')
Out[6]:
In [8]:
Copied!
Image(filename='./data/LSTM_4.png')
Image(filename='./data/LSTM_4.png')
Out[8]:
定义LSTM¶
In [27]:
Copied!
class LSTM(nn.Module):
def __init__(self,
input_size,
hidden_size,
output_size,
num_layers: int=1,
bias: bool=True,
batch_first: bool=False,
dropout: float=0,
bidirectional: bool=False,
proj_size: int=0):
super(LSTM, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
bias=bias,
batch_first=batch_first,
dropout=dropout,
bidirectional=bidirectional,
proj_size=proj_size)
# 使用这一步,我们可以自定义输出形状output_size
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
# 这里如果不提供h0,那么会默认使用zero tensor作为h0
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_()
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_()
out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
out = self.fc(out)
return out, (hn, cn)
class LSTM(nn.Module):
def __init__(self,
input_size,
hidden_size,
output_size,
num_layers: int=1,
bias: bool=True,
batch_first: bool=False,
dropout: float=0,
bidirectional: bool=False,
proj_size: int=0):
super(LSTM, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
bias=bias,
batch_first=batch_first,
dropout=dropout,
bidirectional=bidirectional,
proj_size=proj_size)
# 使用这一步,我们可以自定义输出形状output_size
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
# 这里如果不提供h0,那么会默认使用zero tensor作为h0
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_()
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_()
out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
out = self.fc(out)
return out, (hn, cn)
定义一些参数并初始化模型¶
In [25]:
Copied!
BATCH_SIZE = 64
seq_len = 30
features = 40
sample_size = 1000
learning_rate = 0.01
num_epochs = 100
loss = nn.MSELoss()
lstm = LSTM(input_size=features, hidden_size=128, output_size=1, batch_first=True)
optim = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
BATCH_SIZE = 64
seq_len = 30
features = 40
sample_size = 1000
learning_rate = 0.01
num_epochs = 100
loss = nn.MSELoss()
lstm = LSTM(input_size=features, hidden_size=128, output_size=1, batch_first=True)
optim = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
训练模型¶
In [26]:
Copied!
# 训练模型
idx = 0
loss_list = []
for epoch in range(num_epochs):
for step, (inputs, targets) in enumerate(trainloader): # 每一步 loader 释放一小批数据用来学习
optim.zero_grad()
output_tensor, _ = lstm(inputs)
l = loss(output_tensor, targets)
l.backward()
optim.step()
if (epoch + 1) % 10 == 0:
idx += 1
loss_list.append(l)
print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {l.item():.4f}")
with torch.no_grad():
# plot the train loss
plt.plot(np.arange(idx), np.array(loss_list))
plt.xlabel('train epochs')
plt.ylabel('train loss')
plt.show()
# 训练模型
idx = 0
loss_list = []
for epoch in range(num_epochs):
for step, (inputs, targets) in enumerate(trainloader): # 每一步 loader 释放一小批数据用来学习
optim.zero_grad()
output_tensor, _ = lstm(inputs)
l = loss(output_tensor, targets)
l.backward()
optim.step()
if (epoch + 1) % 10 == 0:
idx += 1
loss_list.append(l)
print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {l.item():.4f}")
with torch.no_grad():
# plot the train loss
plt.plot(np.arange(idx), np.array(loss_list))
plt.xlabel('train epochs')
plt.ylabel('train loss')
plt.show()
Epoch [10/100], Loss: 0.2422 Epoch [20/100], Loss: 0.0581 Epoch [30/100], Loss: 0.0343 Epoch [40/100], Loss: 0.0318 Epoch [50/100], Loss: 0.0218 Epoch [60/100], Loss: 0.0228 Epoch [70/100], Loss: 0.0220 Epoch [80/100], Loss: 0.0191 Epoch [90/100], Loss: 0.0177 Epoch [100/100], Loss: 0.0169
模型在测试集上的表现¶
In [ ]:
Copied!
# 先将test_dataset逆转换为tensor的格式
with torch.no_grad():
test_data_x = [x[0] for x in testset]
test_data_y = [x[1] for x in testset]
test_data_x = torch.stack(test_data_x, dim=0)
test_data_y = torch.stack(test_data_y, dim=0)
# 先将test_dataset逆转换为tensor的格式
with torch.no_grad():
test_data_x = [x[0] for x in testset]
test_data_y = [x[1] for x in testset]
test_data_x = torch.stack(test_data_x, dim=0)
test_data_y = torch.stack(test_data_y, dim=0)
In [ ]:
Copied!
with torch.no_grad():
lstm_output = lstm(test_data_x)[0]
l = loss(lstm_output, test_data_y)
print(f'Test loss is {l}')
with torch.no_grad():
lstm_output = lstm(test_data_x)[0]
l = loss(lstm_output, test_data_y)
print(f'Test loss is {l}')
Test loss is 1.6294636726379395
在pytorch中,GRU的实现方式和LSTM几乎一致,最大的区别在于GRU中不包含候选状态ct
In [10]:
Copied!
Image(filename='./data/GRU_1.png')
Image(filename='./data/GRU_1.png')
Out[10]:
In [11]:
Copied!
Image(filename='./data/GRU_2.png')
Image(filename='./data/GRU_2.png')
Out[11]:
In [13]:
Copied!
Image(filename='./data/GRU_3.png')
Image(filename='./data/GRU_3.png')
Out[13]:
In [14]:
Copied!
Image(filename='./data/GRU_4.png')
Image(filename='./data/GRU_4.png')
Out[14]:
定义GRU¶
In [46]:
Copied!
class GRU(nn.Module):
def __init__(self,
input_size,
hidden_size,
output_size,
num_layers: int=1,
bias: bool=True,
batch_first: bool=False,
dropout: float=0,
bidirectional: bool=False):
super(GRU, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.gru = nn.GRU(input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
bias=bias,
batch_first=batch_first,
dropout=dropout,
bidirectional=bidirectional)
# 使用这一步,我们可以自定义输出形状output_size
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
# 这里如果不提供h0,那么会默认使用zero tensor作为h0
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_()
out, hn = self.gru(x, h0.detach())
out = self.fc(out)
return out, hn
class GRU(nn.Module):
def __init__(self,
input_size,
hidden_size,
output_size,
num_layers: int=1,
bias: bool=True,
batch_first: bool=False,
dropout: float=0,
bidirectional: bool=False):
super(GRU, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.gru = nn.GRU(input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
bias=bias,
batch_first=batch_first,
dropout=dropout,
bidirectional=bidirectional)
# 使用这一步,我们可以自定义输出形状output_size
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
# 这里如果不提供h0,那么会默认使用zero tensor作为h0
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_()
out, hn = self.gru(x, h0.detach())
out = self.fc(out)
return out, hn
In [41]:
Copied!
inputs.shape
inputs.shape
Out[41]:
torch.Size([64, 30, 40])
定义一些参数并初始化模型¶
In [48]:
Copied!
BATCH_SIZE = 64
seq_len = 30
features = 40
sample_size = 1000
learning_rate = 0.01
num_epochs = 100
loss = nn.MSELoss()
gru = GRU(input_size=features, hidden_size=128, output_size=1, batch_first=True)
optim = torch.optim.Adam(gru.parameters(), lr=learning_rate)
BATCH_SIZE = 64
seq_len = 30
features = 40
sample_size = 1000
learning_rate = 0.01
num_epochs = 100
loss = nn.MSELoss()
gru = GRU(input_size=features, hidden_size=128, output_size=1, batch_first=True)
optim = torch.optim.Adam(gru.parameters(), lr=learning_rate)
训练模型¶
In [49]:
Copied!
# 训练模型
idx = 0
loss_list = []
for epoch in range(num_epochs):
for step, (inputs, targets) in enumerate(trainloader): # 每一步 loader 释放一小批数据用来学习
optim.zero_grad()
output_tensor, _ = gru(inputs)
l = loss(output_tensor, targets)
l.backward()
optim.step()
if (epoch + 1) % 10 == 0:
idx += 1
loss_list.append(l)
print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {l.item():.4f}")
with torch.no_grad():
# plot the train loss
plt.plot(np.arange(idx), np.array(loss_list))
plt.xlabel('train epochs')
plt.ylabel('train loss')
plt.show()
# 训练模型
idx = 0
loss_list = []
for epoch in range(num_epochs):
for step, (inputs, targets) in enumerate(trainloader): # 每一步 loader 释放一小批数据用来学习
optim.zero_grad()
output_tensor, _ = gru(inputs)
l = loss(output_tensor, targets)
l.backward()
optim.step()
if (epoch + 1) % 10 == 0:
idx += 1
loss_list.append(l)
print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {l.item():.4f}")
with torch.no_grad():
# plot the train loss
plt.plot(np.arange(idx), np.array(loss_list))
plt.xlabel('train epochs')
plt.ylabel('train loss')
plt.show()
Epoch [10/100], Loss: 0.3580 Epoch [20/100], Loss: 0.1204 Epoch [30/100], Loss: 0.0780 Epoch [40/100], Loss: 0.0581 Epoch [50/100], Loss: 0.0602 Epoch [60/100], Loss: 0.0508 Epoch [70/100], Loss: 0.0483 Epoch [80/100], Loss: 0.0553 Epoch [90/100], Loss: 0.0513 Epoch [100/100], Loss: 0.0651
模型在测试集上的表现¶
In [50]:
Copied!
# 先将test_dataset逆转换为tensor的格式
with torch.no_grad():
test_data_x = [x[0] for x in testset]
test_data_y = [x[1] for x in testset]
test_data_x = torch.stack(test_data_x, dim=0)
test_data_y = torch.stack(test_data_y, dim=0)
# 先将test_dataset逆转换为tensor的格式
with torch.no_grad():
test_data_x = [x[0] for x in testset]
test_data_y = [x[1] for x in testset]
test_data_x = torch.stack(test_data_x, dim=0)
test_data_y = torch.stack(test_data_y, dim=0)
In [51]:
Copied!
with torch.no_grad():
lstm_output = lstm(test_data_x)[0]
l = loss(lstm_output, test_data_y)
print(f'Test loss is {l}')
with torch.no_grad():
lstm_output = lstm(test_data_x)[0]
l = loss(lstm_output, test_data_y)
print(f'Test loss is {l}')
Test loss is 1.5890101194381714
参考链接¶
nn.LSTM
nn.GRU
LSTM输入形状
LSTM不一定在每个时间步都需要计算一个损失。根据具体问题,如果每组时间步只有最后一个时间步具有标签,那么可以只计算最后一个时间步的输出,并且和标签比对,输出一个损失。例如,输入样本形状为(batch, seq_len, feature),则LSTM的每个时间步都输入一个大小为(batch, feature)的样本。
这点其实是RNN不同模式的区别,具体见深度学习课件。